import json
import re

PLATE_CHAR = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂",
              "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "港", "学", "使", "警", "澳", "挂"]




def get_hphm(str):
    length = len(str)
    for i in range(length):
        if i+6<=length:
           if str[i] in PLATE_CHAR:

               if re.match(r'[A-Z0-9]{6}', str[i+1:i+7]):
                   return str[i:i+7]
               else:
                   continue
           else:
               continue
        else:
            break
    return 0


file_path = 'D:/BaiduNetdiskDownload/hphm_02_0618_6000/02/Label.txt'
f = open(file_path, "r", encoding='utf-8').readlines()
file = open("train_hphm.txt", "w", encoding='utf-8')
for i in f:
    if len(i) > 10:
        pic, content = i.split("\t")
        j = json.loads(content)
        j[0]['transcription'] = get_hphm(pic)
        for letter in pic:
            if letter in PLATE_CHAR:
                #pic = pic.replace(letter,'')
                break
        print(pic)

        file.write(pic + '\t' + json.dumps(j,ensure_ascii=False) + "\n")
